library(tidyverse)
library(tigris)
library(censusapi)
library(sf)
library(mapview)
library(plotly)
library(leaflet)
library(lehdr)
library(ggplot2)
options(
tigris_class = "sf",
tigris_use_cache = T
)
Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")
Using the LODES dataset and the list of essential workers to map the distribution of essential workers by block group in San Jose
First get the block groups in San Jose
scc_blockgroups <- block_groups("CA","Santa Clara", cb=F, progress_bar=F)
# Use tracts sent to us by San Jose
dir <- "pCloud Drive/SFBI/Data Library"
sj_tracts <- st_read(file.path(dir,"San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp")) %>%
st_as_sf() %>%
st_transform(st_crs(scc_blockgroups))
## Reading layer `CSJ_Census_Tracts' from data source `/Users/spencerrobinson/pCloud Drive/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp' using driver `ESRI Shapefile'
## Simple feature collection with 219 features and 9 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## CRS: 2227
sj_citycouncil_disticts <- st_read(file.path(dir, "San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp")) %>%
st_as_sf() %>%
st_transform(st_crs(scc_blockgroups))
## Reading layer `CITY_COUNCIL_DISTRICTS' from data source `/Users/spencerrobinson/pCloud Drive/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp' using driver `ESRI Shapefile'
## Simple feature collection with 10 features and 7 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## CRS: 2227
# from code written by others to get SJ blockgroups
sj_blockgroups <-
scc_blockgroups %>%
st_centroid() %>%
st_join(sj_tracts, left = F) %>%
st_join(sj_citycouncil_disticts%>% dplyr::select(DISTRICTS)) %>%
mutate(
DISTRICTS = DISTRICTS %>% factor(levels = c("1","2","3","4","5","6","7","8","9","10"))
) %>%
st_set_geometry(NULL) %>%
left_join(scc_blockgroups%>% dplyr::select(GEOID), by = "GEOID") %>%
st_as_sf() %>%
dplyr::select(GEOID, DISTRICTS)
# the spatial join leaves off two blockgroups which are touching district 9. The following code assigns those to district 9
sj_blockgroups$DISTRICTS[is.na(sj_blockgroups$DISTRICTS)] <- 9
sj_boundary <-
places("CA", cb=F, progress_bar=F) %>%
filter(NAME == "San Jose")
Next obtain the distribution of workers from the LODES dataset
# define the LODES categories, from https://lehd.ces.census.gov/data/lodes/LODES7/LODESTechDoc7.3.pdf
LODES_variable <- c('CNS01', 'CNS02', 'CNS03', 'CNS04', 'CNS05', 'CNS05', 'CNS05', 'CNS06', 'CNS07', 'CNS07', 'CNS08', 'CNS08', 'CNS09', 'CNS10', 'CNS11', 'CNS12', 'CNS13', 'CNS14', 'CNS15', 'CNS16', 'CNS17', 'CNS18', 'CNS19', 'CNS20')
LODES_NAICS <- c('11', '21', '22', '23', '31', '32', '33', '42', '44', '45', '48', '49', '51', '52', '53', '54', '55', '56', '61', '62', '71', '72', '81', '92')
LODES_keys <- data.frame(LODES_variable, LODES_NAICS)
# get the LODES data
sj_rac <-
grab_lodes(
state = "ca",
year = 2017,
lodes_type = "rac",
job_type = "JT01",
segment = "S000",
state_part = "main",
agg_geo = "bg"
) %>%
left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
filter(!is.na(DISTRICTS)) %>%
dplyr::select(-c(contains("CE"),contains("CA"),contains("CR"), contains("CT"),contains("CS"),contains("CD")))
Get QWI Data which tells us number of works in each 4 digit NAICS code
# Get distribution of jobs in Santa Clara County
#Get NAICS codes
label_industry <-
read_csv(file.path(dir,"NAICS/label_industry.csv"))
#GetQWI data
qwi_scc <- NULL
for(years in 2017){
qwi<-
getCensus(
name = "timeseries/qwi/sa",
region = "county:085",
regionin = "state:06",
vars = c("EmpS","EarnS","industry","ind_level"),
time = years
) %>%
filter(ind_level == 4) %>%
mutate(
year = substr(time,1,4)
) %>%
left_join(label_industry, by= "industry") %>%
group_by(year,industry,label) %>%
summarize(
EmpS = round(mean(as.numeric(EmpS), na.rm = TRUE),0),
EarnS = round(mean(as.numeric(EarnS), na.rm = TRUE),0)
) %>%
filter(!is.na(EmpS) & EmpS != 0)
qwi_scc<-
rbind(qwi_scc,qwi)
}
arrange(qwi, desc(EmpS))
## # A tibble: 268 x 5
## # Groups: year, industry [268]
## year industry label EmpS EarnS
## <chr> <chr> <chr> <dbl> <dbl>
## 1 2017 5415 Computer Systems Design and Related Services 74335 15200
## 2 2017 7225 Restaurants and Other Eating Places 54063 2217
## 3 2017 5191 Other Information Services 45339 34325
## 4 2017 3341 Computer and Peripheral Equipment Manufacturing 42593 21186
## 5 2017 3344 Semiconductor and Other Electronic Component Manu… 38376 19610
## 6 2017 6111 Elementary and Secondary Schools 36927 5059
## 7 2017 6221 General Medical and Surgical Hospitals 31975 8206
## 8 2017 6241 Individual and Family Services 24505 1623
## 9 2017 6113 Colleges, Universities, and Professional Schools 23194 8034
## 10 2017 5112 Software Publishers 21594 26285
## # … with 258 more rows
Get Essential Deisgnations
#Here I will use the Vulnerability Team's California Essential Business NAICS code list according to California State Public Health Officer's Report
dir <- "/Users/spencerrobinson/pCloud Drive/SFBI/Data Library"
CA_essential <- read_csv(file.path(dir,'Essential Designations/ca_essential_designations.csv'))
CA_essential<- CA_essential %>%
mutate(`CA Essential` = replace_na(`CA Essential`, FALSE))
CA_essential$`CA Essential` <- as.logical(CA_essential$`CA Essential`)
#Here I use Delaware's Essential Business NAICS code list (only goes to 4 digit)
DE_essential <- read_csv(file.path(dir, 'Essential Designations/delaware_essential_designations.csv'))
Method 1: Use the QWI data to find the distribution of workers in NAICS 4-digit codes in Santa Clara County, and use this to weight the contributions of each 2-digit code.
#CA Fraction Employed; CA designations
CA_essential_weighted_grouped_ca1 <- CA_essential %>%
mutate(NAICS = str_sub(NAICS, 1,4)) %>%
full_join(qwi_scc, by = c('NAICS' = 'industry')) %>%
mutate(NAICS_2_dig = substr(NAICS, 1, 2)) %>%
full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>%
mutate(EmpS = replace_na(EmpS, 0)) %>% # assume if not listed it is not essential, and if no employees listed that the number is zero
group_by(LODES_variable, `CA Essential`) %>%
summarize(totalWorkers = sum(EmpS)) %>% # get number of 4 digit codes within each 2 digit code that are essential and nonessential and total workers essential/nonessential
ungroup() %>%
spread(`CA Essential`, totalWorkers, fill = 0) %>%
rename(c("Essential" = "TRUE", "Nonessential" = "FALSE")) %>%
mutate(totalCount = Essential + Nonessential, fracEssential = Essential / totalCount) %>%
mutate(df = "CA")
#CA Fraction Employed; DE designations
CA_essential_weighted_grouped_de1 <- DE_essential %>%
mutate(NAICS = str_sub(NAICS, 1,4)) %>%
full_join(qwi_scc, by = c('NAICS' = 'industry')) %>%
mutate(NAICS_2_dig = substr(NAICS, 1, 2)) %>%
full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>%
mutate(EmpS = replace_na(EmpS, 0)) %>% # assume if not listed it is not essential, and if no employees listed that the number is zero
group_by(LODES_variable, Essential) %>%
summarize(totalWorkers = sum(EmpS)) %>% # get number of 4 digit codes within each 2 digit code that are essential and nonessential and total workers essential/nonessential
mutate(Essential = replace_na(Essential, TRUE)) %>% #CNS20 or NAICS code 92 isn't in the DE dataset and is given as NA, but it is public administration and we assume essential
ungroup() %>%
spread(Essential, totalWorkers, fill = 0) %>%
rename(c("Essential" = "TRUE", "Nonessential" = "FALSE")) %>%
mutate(totalCount = Essential + Nonessential, fracEssential = Essential / totalCount) %>%
mutate(df = "DE")
#Let's compare the fraction essential for California versus Delaware
compare_df <- bind_rows(CA_essential_weighted_grouped_ca1, CA_essential_weighted_grouped_de1)
compare_graph <- ggplot(data = compare_df, aes(x = LODES_variable, y = fracEssential, fill = df))+
geom_bar(stat = "identity", position = position_dodge())+
ggtitle(" Fraction Employed in Santa Clara; Comparing Delaware and California Designations")+
theme_minimal()+
theme(
plot.title = element_text(size=14, face="bold.italic"),
axis.text.x = element_text(angle = 90, size=14),
axis.title.y = element_text(size=14)
)
compare_graph
# get essential breakdown in San Jose - CA designations
sj_rac_essential_ca1 <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
left_join(CA_essential_weighted_grouped_ca1 %>% dplyr::select(fracEssential, LODES_variable), by = c("Category" = "LODES_variable")) %>%
mutate(numEssential = fracEssential * Number) %>%
group_by(h_bg, C000) %>%
summarize(totalEssential = sum(numEssential)) %>%
ungroup() %>%
mutate(fracEssential = round((totalEssential / C000)*100, 1))
sj_rac_essential_de1 <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
left_join(CA_essential_weighted_grouped_de1 %>% dplyr::select(fracEssential, LODES_variable), by = c("Category" = "LODES_variable")) %>%
mutate(numEssential = fracEssential * Number) %>%
group_by(h_bg, C000) %>%
summarize(totalEssential = sum(numEssential)) %>%
ungroup() %>%
mutate(fracEssential = round((totalEssential / C000)*100, 1))
Map of method 1 using California Essential Business Designations
# set up palette
blue_pal <- colorNumeric(
palette = "Blues",
domain =
c(sj_rac_essential_ca1 %>%
pull(fracEssential) %>%
unique())
)
ca_map1 <- leaflet(data = sj_rac_essential_ca1) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data =
sj_rac_essential_ca1 %>%
left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
st_as_sf() %>% st_set_crs(4326),
fillColor = ~blue_pal(fracEssential),
color = "white",
weight = 0.25,
fillOpacity = 0.5,
label = ~paste0(fracEssential,"% of workers that are essential by block group "),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
)%>%
addLegend(pal = blue_pal, values = ~fracEssential, opacity = 1, title = "% Workers in Essential Bus.")
ca_map1
Map of method 1 using Delaware Essential Business Designations
red_pal <- colorNumeric(
palette = "Reds",
domain =
c(sj_rac_essential_de1 %>%
pull(fracEssential) %>%
unique())
)
de_map1 <- leaflet(data = sj_rac_essential_de1) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data =
sj_rac_essential_de1 %>%
left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
st_as_sf() %>% st_set_crs(4326),
fillColor = ~red_pal(fracEssential),
color = "white",
weight = 0.25,
fillOpacity = 0.5,
label = ~paste0(fracEssential,"% of workers that are essential by block group "),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(pal = red_pal, values = ~fracEssential, opacity = 1, title = "% Workers in Essential Bus.")
de_map1
Method 2: Use fraction of 6-digit codes considered essential to create fraction of essential codes for each of the 4-digit codes. Then use Method 1 to determine fraction of essential workers for each 2-digit code.
#CA Fraction Employed; CA designations
CA_essential_fraction <- CA_essential %>%
mutate(NAICS = str_sub(NAICS, 1,6)) %>%
mutate(NAICS_4_dig = str_sub(NAICS, 1,4)) %>%
group_by(NAICS_4_dig, `CA Essential`) %>%
summarize(NAICS_factor = n()) %>%
spread(`CA Essential`, NAICS_factor) %>%
mutate(`FALSE` = replace_na(`FALSE`, 0), `TRUE` = replace_na(`TRUE`, 0)) %>%
mutate(total = `TRUE` + `FALSE`, fraction = `TRUE` / total)
CA_essential_fraction <- CA_essential_fraction[,-(2:4)]
#CA Fraction Employed; CA designations
CA_essential_weighted_grouped_ca2 <- CA_essential %>%
mutate(NAICS = str_sub(NAICS, 1,4)) %>%
full_join(CA_essential_fraction, by = c('NAICS' = 'NAICS_4_dig')) %>%
full_join(qwi_scc, by = c('NAICS' = 'industry')) %>%
mutate(NAICS_2_dig = substr(NAICS, 1, 2)) %>%
full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>%
mutate(EmpS = replace_na(EmpS, 0)) %>% # assume if not listed it is not essential, and if no employees listed that the number is zero
{if(TRUE) mutate(., EmpS = fraction*EmpS) else . } %>% #Multiply number of essential employees by fraction of NAICS codes essential
group_by(LODES_variable, `CA Essential`) %>%
summarize(totalWorkers = sum(EmpS)) %>%
ungroup() %>%
spread(`CA Essential`,totalWorkers, fill = 0) %>%
rename(c("Essential" = "TRUE", "Nonessential" = "FALSE")) %>%
mutate(totalCount = Essential + Nonessential, fracEssential = Essential / totalCount) %>%
na.omit() %>%
mutate(df = "CA2")
compare_df <- bind_rows(CA_essential_weighted_grouped_ca1, CA_essential_weighted_grouped_ca2)
compare_graph <- ggplot(data = compare_df, aes(x = LODES_variable, y = fracEssential, fill = df))+
geom_bar(stat = "identity", position = position_dodge())+
ggtitle("Fraction Employed in Santa Clara; Comparing Delaware and California Designations")+
theme_minimal()+
theme(
plot.title = element_text(size=14, face="bold.italic"),
axis.text.x = element_text(angle = 90, size=14),
axis.title.y = element_text(size=14)
)
compare_graph
# get essential breakdown in San Jose - CA designations
sj_rac_essential_ca2 <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
left_join(CA_essential_weighted_grouped_ca2 %>% dplyr::select(fracEssential, LODES_variable), by = c("Category" = "LODES_variable")) %>%
mutate(numEssential = fracEssential * Number) %>%
group_by(h_bg, C000) %>%
summarize(totalEssential = sum(numEssential, na.rm = T)) %>%
ungroup() %>%
mutate(fracEssential = round((totalEssential / C000)*100, 1))
Map of method 2 using California Essential Business Designations
# set up palette
blue_pal <- colorNumeric(
palette = "Blues",
domain =
c(sj_rac_essential_ca2 %>%
pull(fracEssential) %>%
unique())
)
ca_map2 <- leaflet(data = sj_rac_essential_ca2) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data =
sj_rac_essential_ca2 %>%
left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
st_as_sf() %>% st_set_crs(4326),
fillColor = ~blue_pal(fracEssential),
color = "white",
weight = 0.25,
fillOpacity = 0.5,
label = ~paste0(fracEssential,"%"),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(pal = blue_pal, values = ~fracEssential, opacity = 1, title = "% Workers in Essential Bus.")
ca_map2